{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "DTvvySDuazf1"
      },
      "source": [
        "##### Copyright 2023 The Authors.\n",
        "\n",
        "Licensed under the Apache License, Version 2.0 (the \"License\");"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "eghbXjWg32tl"
      },
      "source": [
        "# Image aesthetic scoring with VILA model\n",
        "In this colab, you can try using [VILA](https://openaccess.thecvf.com/content/CVPR2023/html/Ke_VILA_Learning_Image_Aesthetics_From_User_Comments_With_Vision-Language_Pretraining_CVPR_2023_paper.html) model to score the aesthetis of an image. The model leverages aesthetic pretraining on user comment and image pairs, capturing rich aesthetic information."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "IWCgOGEd33Ei"
      },
      "outputs": [],
      "source": [
        "import tensorflow as tf\n",
        "import tensorflow_hub as hub\n",
        "\n",
        "import requests\n",
        "from PIL import Image\n",
        "from io import BytesIO\n",
        "\n",
        "import matplotlib.pyplot as plt\n",
        "import numpy as np"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "9W8LfqUW4jcY"
      },
      "source": [
        "# Load Model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "pSWOBLfg4UIw"
      },
      "outputs": [],
      "source": [
        "model_handle = 'https://tfhub.dev/google/vila/image/1'\n",
        "model = hub.load(model_handle)\n",
        "predict_fn = model.signatures['serving_default']"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "DZpRvyHR4iT9"
      },
      "source": [
        "# Util functions\n",
        "\n",
        "Functions to download an image and to show it."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "CtmHdgwW4gYg"
      },
      "outputs": [],
      "source": [
        "def load_image_from_url(img_url):\n",
        "  \"\"\"Returns an image with shape [1, height, width, num_channels].\"\"\"\n",
        "  user_agent = {'User-agent': 'Colab Sample (https://tensorflow.org)'}\n",
        "  response = requests.get(img_url, headers=user_agent)\n",
        "  image_bytes = BytesIO(response.content)\n",
        "  image = Image.open(image_bytes)\n",
        "  return image, response.content\n",
        "\n",
        "def show_image(image, title=''):\n",
        "  image_size = image.size\n",
        "  plt.imshow(image)\n",
        "  plt.axis('on')\n",
        "  plt.title(title)\n",
        "  plt.show()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6fHh03fQ4wpR"
      },
      "source": [
        "# Load an image\n",
        "\n",
        "The prediction will run directly on the bytes loaded, no preprocessing is needed"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "U4nHP1vP4xJa"
      },
      "outputs": [],
      "source": [
        "image_url = 'https://blogger.googleusercontent.com/img/b/R29vZ2xl/AVvXsEgr0DKaAoO6qTrJo3hXP8UM3D4AB8gQeNI22Q2QphBVGgn-5v84tjhH3ZWTlGtlUoPdlcx54dM93Qi04MuN7eBbj9WlT8Qxy6B2Us4kcn_53FH28MnTtGCzMPhjCVGIgXRL8ZEMeO-7iue7sNEGxBtgx2bI-eKDQAondM8Dfjb1FaybFgUQji4UU9-0vQ/s1024/image9.png' #@param {type: 'string'}\n",
        "\n",
        "image, image_bytes = load_image_from_url(image_url)\n",
        "\n",
        "show_image(image)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ext9UcGv6AkD"
      },
      "source": [
        "# Run Prediction on a Single Image"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "C-Vgd9Rh4yWV"
      },
      "outputs": [],
      "source": [
        "prediction = predict_fn(tf.constant(image_bytes))\n",
        "print(\"predicted MOS in [0, 1]: \", prediction['predictions'])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "trhGleI26X49"
      },
      "outputs": [],
      "source": []
    }
  ],
  "metadata": {
    "colab": {
      "last_runtime": {
        "build_target": "//learning/grp/tools/ml_python:ml_notebook",
        "kind": "private"
      },
      "private_outputs": true,
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
